SpatialPCA, or spatial probabilistic PCA, is a spatially aware dimension reduction method that aims to infer a low dimensional representation of the gene expression data in spatial transcriptomics. SpatialPCA builds upon the probabilistic version of PCA, incorporates localization information as additional input, and uses a kernel matrix to explicitly model the spatial correlation structure across tissue locations.
The inferred low dimensional components from SpatialPCA contain spatial correlation information and can be paired with various analytic tools already developed in scRNA-seq studies to enable effective and novel downstream analyses for spatial transcriptomics. The examined downstream analyses of spatial transcriptomics include spatial transcriptomics visualization, spatial domain detection, trajectory inference on the tissue, and high-resolution spatial map reconstruction.
library(devtools)
## Loading required package: usethis
install_github("shangll123/SpatialPCA")
## Skipping install of 'SpatialPCA' from a github remote, the SHA1 (877192be) has not changed since last install.
## Use `force = TRUE` to force installation
library(SpatialPCA)
library(ggplot2)
library(bluster)
library(ggpubr)
library(slingshot)
Load in processed data: a normalized gene expression matrix (m gene by n locations) and a location matrix (n locations by d dimension).
expr=readRDS(system.file("extdata", "ST_tumor_expr.RData", package = "SpatialPCA"))
info=readRDS(system.file("extdata", "ST_tumor_info.RData", package = "SpatialPCA"))
ls()
## [1] "expr" "info"
dim(expr)
## [1] 319 607
dim(info)
## [1] 607 2
We first prepare data for SpatialPCA functions.
# We select number of spatial PCs to be 20, and use Gaussian kernel in this data.
dat = data_prepare_func(expr, info, PCnum = 20, kerneltype = "gaussian")
## [1] "Input expression data: 319 genes on 607 locations."
## [1] "Kernel matrix built!"
## [1] "Eigen decomposition on kernel matrix finished!"
We run SpatialPCA functions to extract spatial PCs:
Est_para = SpatialPCA_estimate_parameter(dat_input=dat)
Est_W = SpatialPCA_estimate_W(Est_para$par, dat)
Est_Z = SpatialPCA_estimate_Z(Est_para$par,dat,Est_W)
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
dat$Est_para = Est_para
dat$Est_W = Est_W
dat$Est_Z = Est_Z
dat$Z_spatial = Est_Z$Z_hat
We visualize the first three spatial PCs on their tissue locations.
p_PCs = plot_factor_value(info,dat$Z_spatial,textmethod="SpatialPCA",pointsize=3.5,textsize=15)
ggarrange(p_PCs[[1]], p_PCs[[2]], p_PCs[[3]],
ncol = 3, nrow = 1)
We summarize the inferred spatial PCs into three UMAP or tSNE components and visualize the three resulting components with red/green/blue (RGB) colors in the RGB plot.
p_tsne = plot_RGB_tSNE(info,dat$Z_spatial,pointsize=3.5,textsize=15)$figure
p_umap = plot_RGB_UMAP(info,dat$Z_spatial,pointsize=3.5,textsize=15)$figure
print(ggarrange(p_tsne, p_umap, ncol = 3, nrow = 1))
Clustering of tissue locations are performed based on the inferred low-dimensional components from SpatialPCA. Tissue domains are detected by SpatialPCA clustering results.
walktrap_cluster_SpatialPCA = walktrap_clustering(knearest = c(31), latent_dat=dat$Z_spatial)
## [1] 1
cbp_spatialpca <- c( "plum1", "dodgerblue","mediumaquamarine", "palegreen4","chocolate1","lightblue2","#F0E442","red","#CC79A7","mediumpurple","seagreen1")
clusterlabel = walktrap_cluster_SpatialPCA$cluster_label[[1]]
colnames(info) = c("sdimx","sdimy")
loc1 = info$sdimx
loc2 = info$sdimy
datt = data.frame(clusterlabel, loc1, loc2)
p = ggplot(datt, aes(x = loc1, y = loc2, color = clusterlabel)) +
geom_point( alpha = 1,size=3.5) +
scale_color_manual(values = cbp_spatialpca)+
theme_void()+
theme(plot.title = element_text(size = 10, face = "bold"),
text = element_text(size = 10),
legend.position = "bottom")
print(p)
Spatial PCs can also be paired with existing trajectory inference algorithms (we used slingshot) to infer the spatial trajectories across tissue locations.
meta_data = info
meta_data$SpatialPCA_Walktrap = walktrap_cluster_SpatialPCA$cluster_label[[1]]
sim<- SingleCellExperiment(assays = expr)
reducedDims(sim) <- SimpleList(DRM = t(dat$Z_spatial))
colData(sim)$Walktrap <- factor(meta_data$SpatialPCA_Walktrap)
sim <-slingshot(sim, clusterLabels = 'Walktrap', reducedDim = 'DRM',start.clus="5" ) # in this data we set tumor region as start cluster, one can change to their preferred start region
summary(sim@colData@listData)
## Length Class Mode
## Walktrap 607 factor numeric
## slingshot 607 PseudotimeOrdering S4
## slingPseudotime_1 607 -none- numeric
## slingPseudotime_2 607 -none- numeric
## slingPseudotime_3 607 -none- numeric
We visualize the three trajectories inferred by SpatialPCA on the original data. Arrows point from tissue locations with low pseudo-time to tissue locations with high pseudo-time.
pseudotime_traj1 = sim@colData@listData$slingPseudotime_1
pseudotime_traj2 = sim@colData@listData$slingPseudotime_2
pseudotime_traj3 = sim@colData@listData$slingPseudotime_3
clusterlabels = meta_data$SpatialPCA_Walktrap
gridnum = 10
color_in = c( "plum1", "dodgerblue","mediumaquamarine", "palegreen4", "chocolate1","lightblue2","#F0E442", "black","#CC79A7","mediumpurple","seagreen1")
p_traj1 = plot_trajectory(pseudotime_traj1, info,clusterlabels,gridnum,color_in,pointsize=4 ,arrowlength=0.2,arrowsize=0.8,textsize=15 )
p_traj2 = plot_trajectory(pseudotime_traj2, info,clusterlabels,gridnum,color_in,pointsize=4 ,arrowlength=0.2,arrowsize=0.8,textsize=15 )
p_traj3 = plot_trajectory(pseudotime_traj3, info,clusterlabels,gridnum,color_in,pointsize=4 ,arrowlength=0.2,arrowsize=0.8,textsize=15 )
print(ggarrange( p_traj1[[4]],p_traj2[[4]],p_traj3[[4]],p_traj1[[1]],p_traj2[[1]],p_traj3[[1]],
ncol = 3, nrow = 2))
In SpatialPCA, the correlation among spatial PCs allows us to construct a refined spatial map with a resolution much higher than that of the original study.
Z_high = high_resolution(dat)
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
walktrap_SpatialPCA_highresolution = walktrap_clustering(knearest = 84, Z_high$Z_star)
## [1] 1
cbp_high = c( "plum1", "palegreen4","mediumaquamarine", "dodgerblue", "chocolate1", "#F0E442","lightblue2")
loc1=unlist(Z_high$Location_star[,1])
loc2=unlist(Z_high$Location_star[,2])
Z_high_clusters = walktrap_SpatialPCA_highresolution$cluster_label[[1]]
p3 = plot_cluster(loc1,loc2,Z_high_clusters,pointsize=2,text_size=10 ,title_in="High-resolution",cbp_high)
print(p3)
We then perform trajectory inference on the high-resolution spatial map.
simhigh = SingleCellExperiment(Z_high$Z_star)
reducedDims(simhigh) = SimpleList(DRM = t(Z_high$Z_star))
colData(simhigh)$Walktrap = factor(Z_high_clusters)
simhigh = slingshot(simhigh, clusterLabels = 'Walktrap', reducedDim = 'DRM',start.clus="5") # still set tumor region as start cluster in slingshot
summary(simhigh@colData@listData)
## Length Class Mode
## Walktrap 2428 factor numeric
## slingshot 2428 PseudotimeOrdering S4
## slingPseudotime_1 2428 -none- numeric
## slingPseudotime_2 2428 -none- numeric
## slingPseudotime_3 2428 -none- numeric
Visualization of the three trajectories inferred on the constructed high-resolution spatial map. Arrows point from locations with low pseudo-time to locations with high pseudo-time. The trajectories inferred from the high-resolution spatial PCs display consistent but refined spatial patterns.
Z_high_pseudotime_traj1 = simhigh@colData@listData$slingPseudotime_1
Z_high_pseudotime_traj2 = simhigh@colData@listData$slingPseudotime_2
Z_high_pseudotime_traj3 = simhigh@colData@listData$slingPseudotime_3
cluster = Z_high_clusters
gridnum = 20
color_in = c( "plum1", "palegreen4","mediumaquamarine", "dodgerblue", "chocolate1",
"#F0E442","lightblue2", "black")
p_traj1 = plot_trajectory(Z_high_pseudotime_traj1, Z_high$Location_star,cluster,gridnum,color_in,pointsize=1.5 ,arrowlength=0.2,arrowsize=0.8,textsize=15 )
p_traj2 = plot_trajectory(Z_high_pseudotime_traj2, Z_high$Location_star,cluster,gridnum,color_in,pointsize=1.5 ,arrowlength=0.2,arrowsize=0.8,textsize=15 )
p_traj3 = plot_trajectory(Z_high_pseudotime_traj3, Z_high$Location_star,cluster,gridnum,color_in,pointsize=1.5 ,arrowlength=0.2,arrowsize=0.8,textsize=15 )
print(ggarrange( p_traj1[[4]],p_traj2[[4]],p_traj3[[4]],p_traj1[[1]],p_traj2[[1]],p_traj3[[1]],
ncol = 3, nrow = 2))